{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyspark.sql import SparkSession \n",
    "spark=SparkSession.builder.appName('spark').getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from PIL import Image\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "from wordcloud import WordCloud, STOPWORDS ,ImageColorGenerator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "mask = np.array(Image.open(\"./stormtrooper_mask.png\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#text = open('bigdata.txt').read()\n",
    "from operator import add\n",
    "lines = spark.read.text('spark.txt').rdd.map(lambda r: r[0].lower())\n",
    "counts = lines.flatMap(lambda x: x.split(' ')) \\\n",
    "              .map(lambda x: (x, 1)) \\\n",
    "              .reduceByKey(add)\n",
    "df=counts.toDF()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=df.select(df._1.alias('words'),df._2.alias('counts'))\n",
    "data.createTempView(\"text\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------+------+\n",
      "|      words|counts|\n",
      "+-----------+------+\n",
      "|        the|    60|\n",
      "|      spark|    51|\n",
      "|         of|    49|\n",
      "|          a|    48|\n",
      "|        and|    38|\n",
      "|         in|    33|\n",
      "|         to|    24|\n",
      "|     apache|    22|\n",
      "|         is|    21|\n",
      "|         as|    21|\n",
      "|        for|    16|\n",
      "|         on|    16|\n",
      "|       data|    16|\n",
      "|         by|    15|\n",
      "|distributed|    14|\n",
      "|       that|    12|\n",
      "|        can|    11|\n",
      "|         be|    11|\n",
      "|          =|    10|\n",
      "|         //|    10|\n",
      "+-----------+------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "spark.sql(\"select * from text where counts >5 and words <> '' order by counts desc \").show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=spark.sql(\"select * from text where counts > 5 \\\n",
    "          and words not in ( '','the','of','a','in','and','to','is','as','on','by','for','be','at', \\\n",
    "          'that','can','=','//','are','was','an','such','or','with','which','it','its','this','has') \\\n",
    "          order by counts desc \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd=data.toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>words</th>\n",
       "      <th>counts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>spark</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>apache</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>data</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>distributed</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>rdd</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>val</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>machine</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>provides</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>algorithms</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>graphx</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>interface</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>cluster</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>streaming</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          words  counts\n",
       "0         spark      51\n",
       "1        apache      22\n",
       "2          data      16\n",
       "3   distributed      14\n",
       "4           rdd       9\n",
       "5           val       9\n",
       "6       machine       7\n",
       "7      provides       7\n",
       "8    algorithms       6\n",
       "9        graphx       6\n",
       "10    interface       6\n",
       "11      cluster       6\n",
       "12    streaming       6"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_list=pd.words\n",
    "count_list=pd.counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "dic=dict(zip(word_list,count_list))\n",
    "#word_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "wc = WordCloud(max_words=20, mask=mask,background_color=\"white\", repeat=True,width=600,height=600)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<wordcloud.wordcloud.WordCloud at 0xa7ec6a0>"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wc.generate_from_frequencies(dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAS0AAAD8CAYAAAAi9vLQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XV0FFcbBvAnQhIkEDx4gmuxErSlUAhaAT60aIsXWlq80OLaIkVa3C24FHd3t0KRkEAIFIgQJ9n5/tjusJOdtaxO8vzO2ZOZO9cWyMudO3dmXARBABGRUrg6ugNEROZg0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFcXd0B4zgcn2ijMPFlEwcaRGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaJHoQcxTu7bX5Pggu7ZH6YOLIDj1Y9idunPpTZPjg7C//mxHd4MyLj4jnuQ1OT4IVyPu4+sLk8S0qHex4k/NtiZvy5NDsDH0MBJVSWJar4tTsPTRLjQ9/oOYt8WJwbj45q44gkoRVBh2fT7anRmFJscHSfJq6km93/LkEGx7dlxyTLPNkRkBzv82HrKRqjlLY1nAKHE/R6askp/a/vroN520xTVGAgA2hh4W03Z/PAMAsL/+bMSnJMLDNRP8sxbA9MrfIi4lAR6umbAx9DDaFflUb780bS14sE1M04z+OhZtbPL3o/SLQSsD2l9/tjhqSevpoNyop8nxQciRKRtKeRfB6PLdAQB5PX0AAFncvJAiqJCW6YgmxwchIHd5XHh9B939W6Spv5R+MGhlUJpglSKo4OZi/ixB6mCnPR+2K+yU5R3UUsq7CCZU7G3VOkm5GLQyIO1RknbwWVtrnEkjMO2RmpuLK/Z8PFNn9BafkmhSH0xp75+3oWK+ZgVqY1Dp9iixYTIAYHrNlmjj/4HBtih94dVDcmqpr2hq9hm00iVePSTlK5w5L5Y+2oVXiVFocWIwfqnwtaO7RA7G00Nyaku1rnBqrk5SxsaRFhEpCkda6cyafy5jxs3jcHNxxXcV6qFr6Q/NrqPJnkV4HheNKQHN0aJoeZPLjbt8AEGPriGLu0ea255+/SjWPbiCXJ5Z8HmxChhU6WOTyy6/dwEzb56Aj4cXNnzaBYWy5jC57E8X92Dzo+uokrsQNjbqana/yX44EZ8OlA6aghQjf48PO/wkm66Z0H7Y4SdxO7U9TXuijE8+g+XT0rbGgaf30O/UFrPq0LR7pGU/NPzrzzS3baj/h1r0hb93LoPlyapMmohn0EoHQmMj8cmuP1Ayex7sb/5+PdPmxzcw/Pxf4r7cL3DqX1rtPB9s/g2xyepbd6YGtEDb4pV1ymsCZlraBoDux9bjZPhjcX9To66olqcwAODCvyHoeHgNhlduiN7lahnst5ebO263HSZ73FjATp2n7MZpeKdKAQDcbz8iTevYKE0YtEhNezSl71hajxsy7foRLLp7zmBZTf2+Wbxx+vOBJtet3a9l9dujfoESevP0KBOA0VUb6S1v7e9NacYlD2Q6fb+YlvzCDq/c0OBx7cBgTsBKTS5gaVt+74LeY7b43mRbDFpksqCH12xS75APPklzWTcXk/5zlrgdEW5WfmPzhWRfvHqYjrQ5uALXXofZrP7p14+ifYkqVm+7X/k6ae7T0MoNzC7T5+RmcduUCwknnj9Eg4IlzW6HbINBKx24G/kSLfctsXk7kUnxOmkN//oTT2IibN62Prk8s5hd5nlctFn5Oc5yLgxaCjfywm5sfHQdgPEJaUuVzpFXp21NwLJ129bk550LwW/fAODclRJxTkvhNAHL3dXV6IS0pabXbGmTtoec22lRv8y1pVE3u7ZH1sWglU7MqdPKovJv3xl+lAwAVMpVwCZtbwu+ZVF5c/l4ZrZre2RdDFrpxKAz22XTTT09q7JF/mZkU8qntW3tU7OuR9cbbccWnPH0lQzjnJbClcyeBw+iXyFJlYIOh1djw6ddAEgXdpqqxIbJqJDTF0GNuuDmm+foeHiNeExu7scabe9u2hMt9i3B6RePxQDybfm6eBobhR1P3o/ArD33pH3bkuZnr7I14eHmjq2Pb0om6znv5Vw40lK4/c174yNffwDAxX9DUWLDZJTYMFmyEr1X2ZpG69H8Yt6OCEfFTb8aDViatjXS2nZZn3w69c+/c1oSsGzlYYef0KRwGXF/8d/nMf/2aUnA8nTj/+vOhrfxOEhkWBH4FAy1ap09jm/AqfDHqJ3PD6sadDSpjNwtPo12L8CL+LeYVrMlmhcpZ3LbZ188QWv/Sphco7n5nf/PyAu7sePJbRTOmgNti1cxKeBaw77QvzHm8n5EJyWgedFymFHrc7u0SxK899CZGQtatghqcgzdl+hI1XvNErcvL/7BQE5KR3jvoTOKDCuCyLAi4v67+F06aZrtyLAiiI+eAgB4++oznXzp2eXFP9gkWG09cdPqdZJ9MWjZUVzkYPgUDJWMoDJl/kxM0wQkzXGfgqHInF39UlTvPLvgUzAUOQrcRWLscvt3Pp2YtPqQo7tAFuIsox0lxW1EFh/p0oK4yCF4F78Tntn6GywbGVYEbh5V4Zm5HVQp5t3w66zevI1D4x8XStJMGV1V7zVLkk97X/u0Uru+83dD0H/mFkmezoHV8UNb9ZNRGw9eiDfRcbL9qN5rFuZ89yW+m6Ne2lG1VCEsGdbOtC9JVsegZUcemVvrpCXFBYkjq4S3hl/c4J1nJ2Ij+sPVLX2cIjb+caFNTgHl6qxZriguL/5BJ+BpvImO0xsIAcA3V3bOrTkJBi07ypLzd8S+6QO3TKXFNJ+CoYh8XhzZ8x6W5FWfLvrBO+9euGUqB+98hxAdXg3Zfa+I81zW4GwT8Jbq3qwGqveaBVdXF1xcOMh4AQBnbz8BoDtK01aiUG6r9I8sx6BlZ1lzqU+HvLwHi2k+BR6pf6a6WuhTMFjcdnMvg+y+VwBAnOciXQNb18PA1vXw9dQgvaOq1FJSVKhepjAWDWlrhx6SpTgRT4oW/uatbPqyEe2xaGhb/Ln9jNE66n3gj8v3nlq7a2QjHGmRw2yZ0E3vxLl2umY79ZwTAOTKLn2elr76NFrULifm+blrY3z5UUUAQLli+SVlSxTMjY3j+CoxZ8TFpUTkLLi4lIjSHwYtIlIUBi0iUhQGLSJSFAYtIlIUBi0iUhQGLSJSFAYtIlIUBi0iUhQGLSJSFAYtIlIUBi0iUhQGLSJSFAYtIlIUBi0iUhQGLSJSFAYtIlIUBi0iUhQGLSJSFAYtBarVeSZqdZ7p6G4QOQSDlgKdW/NjmssePHvPij0hsj8GLQWo1XkmOgxbgddRsbLH7z/5V5IXALYcuo4+E4Lwaa954rEFm07j8Pl7WLDptCR/+2Er8OjpawDAvtN3UafrLHw1YhVe6nmnIJEj8b2HTq5W55lpGlkV9c2JuPgkHF48QEzr27YuDp69h8a1ywAAVuw4L9at3c6ZVcbfykzkKBxppQOZ3HX/GmtULIrVk7tApdL/6sjSxfKJ25accqZXzXz7262teSM2oJlvf7u2qVQcaTm5c2t+RK3OM+FXMBf6ta+H+tVL4tyNYADAoXP30KhWGfgXyo1anWcik7ubWK5W55moX70kjl9+IAlIP8/fje1Hb2D+T21Rp4o/anWeCf9CufH42WsGrlT2hv9hUr5mvv1NzqvP7hUnLK4jo+AbpolSCX0Qjt71xgN4H7gW/rwJfSa0FUdCmvTUIyNN+p0LDzH48xmStFN/XUW9llVNquPkriuY3GsJAGDYHz3QoHUN8bh2/tR1zPxrCMp9WDztX96xTHrDNARBcOYPkcM0zd9P3F4weqNkX9+2IAjCs0cvhR41f9E5fnLXFWFMlz+MtmXomFw+Y8cVxKS4wNPDdOpteGV4+143mCc6rCiyFwyxU4+Ub9j87ibl+6b2GADyc2JjV/UzqQ5BJaB5wW910ssHlJDNn5Hmwhi00ilBFeHoLqQ7rm6mXbeqULME+oz/H0pVLpbmtpoX/Fb29PHOhYey+TPSfBivHqZTHEFZRpWiMiv/qA5zsW+tev3bbzsG47sm0/DnqI0Y121BmkdBL5++wZTeS3XSe9YZiyXjtor1TtowEM18+2P74qMZYsTFiXgbiw4rKtnXDiZvw6vB2/eKJM/74wKiw4rppEeHFYVbpgrImnevpA3N8bg3PZGccECnLfWx7khOOAIAyJJ7NeJedxHzqFKeIeZFbdl+JkSPR1LMEq1jT2DqnClJ/ZsQg4/2zMTfrX+xet1lt463Sb12ZNI/Kp4e2oEmAAiqSJ15JH3zStFhxcT0+Ij+Yr7sBUN0AqG2LLmWiPWmlpxw5H3wey69whTzorZ47F38Vkm/kmKWcORmJXm9smFzg55Wqy8dBCqz8fTQjlxcfXTSsubdo5OWED1esp85p/75iuiwonD3ami0bUGIl+xnL/BI62ACXN3LiLuZMreWbYfS7mlspGx6lR2TAaiDj/bP8tsmSPbNoSlz/c1TlN06HrX++hUA8Mne2WbX5YwYtBzMLVNFnbTkhGMGy2QvGILo5yXF/Sy5VhhtJyXpAtw8qsgeexe/G67uRSCoosWPt+8tSXuaER6Dl3nuRoVjwrW9KJxV9z8sADjZfDAa75+LtfW7AwCGVGwEAFBZYdqmcq7CAIDqudV/Z8eaDUpTEHQ2DFpOKGueHcYzCUkAAFd30xYSunvWR0rSNdljmbK0QXLCIbi4Zpd8UsteMAReOSYh8e1ck9okYHfoLTQsUFoSLGKTk8RtlaBCaGwENgdfBQD8dusQzv37GF+Xqq1TFwCkCAISUt5J0s68fCSbN7UxV3djcMVP8effJ839Gs7F1AVdDvooXtSzIsK7hGOCIAhCzL+fC9FhpcVj0c+rGiynSokQt6PCSukcj3pWxGB52TqTn8uWj3pWRIh93U3cj3nZVNxOTrqVql6V3nbJPGW2jBMEQRBOhT9wcE+cgklxgVcPbUwzoR0d5g+vHBPhkfUrk8smxSxC4tuZ8C7wt9X6E/fmGwiq18iaZ7vOMVXKc8S8qAuPLO3g5TNVTE9JOof4iEEQkALv/Bet1hcCot8loM7u33CuxVBky+Tp6O44mklXDxm0bCy9rzov/9MscfvOZD7ShixiUtDinJaTaLB3DuK05jpOvHiItkeXYfbtY2Lax3t+x5XXoQCAbU+kt+gkq9SLIUtvmYDepzdIjlXeMVWyLwD4YPsUyWRv95NrUH3ndGt8FSLbMvU80kGfDGHuneOCIAhCp2MrxLRSm8dL8mjvN9w7VxAEQfji0CLZvL1Ordcpl5SSLKbte3pHEARBGHt1jyAIgvAy/m2a+15u5EzxYyu2rl8JzgX7mZSmcCbFBY60nECfMnUBAGOqNBPT/L1z680fGqu+r/BOZDgAwM3F+Kg6k+v7Z23tfXoXg85vRUSieu1WXq9sANSjtBTnni5I9168XY2bz5uL+w/+HYCnkdKXmFx79hESk5/Zu2tOg0HLCVTYpl5g2PLQQr153FzUf1UN9s7BjS9HAgDut/kZbY8uw93Wo/WW04SzD7ZPEdP2PL2N2TVbo1Px6gCAlQ8uiPUNvag7QU/2cf6JP/J7d0GlAnvE/ZJ556Gwz4+SPFUKnYSneyFHddPhOBFPFrH1RPzL6Bh8MnWxzep3JqGRv6GIzxBx//wTf9Qs9liyLZeWjvDeQ7KOT6YuxsvoGJ30XYO6Giy38cJNjN1+SO/x8gXzYfMA3SUg2oHQWLpcIEtKTkGVX+YY7JszBkAX/jqahH9KZFDTGcvFgJXZIxMujx2ANzFxqDd5IT6bvQr5smeTDWgAsO7c+xX4w1vUR7e61QC8Dz53wl6i4qjZuDVpkKTc3M6fi9un/glG0PkbOumGeGg9K1+uXQCy7TobzchKLs3NNauDeuV4PD0kgzS/6EVz+2Df4B6SYzXGzUds4vtlGuaMXkw9rQw6fwPjdhw2u3599t+8jx/W77ZafWRVXKdFlnn08o24nTpgAcDFMbqPAzaVowJGk0qlHdIuWQ+DFunVcvZKAEBA8cIO7gnRe5zTIqOmtm1qcR2dFmzAtZDnVuiNMtol22HQIqN8c3inuay+K4G25qh2yfYYtMioNzFxyJUti9nljE222yqwOKpdsg/OaZFRo7ceNLuMo57+wKdOpH8MWqTXd43rAACO/W3akzHlFPTRfQKqOdxc0/ZP1NJ2yXkxaJFefRvUFLcfvnytczw86q3ROsIio2XTTT1F+18N3Wfom8LSdtOTjuWGGM+kIAxaZFD+7OonQHw2exWqj50HAIhPeoeKo2aj4bQlKJIrh2y5Df06itv9Vr6/Cfv4vcdpDhzlf5qFpOQUAEBMYhJ+3XPCLu2Sc+GKeDKqzsQ/ERmXoJN+elRf5Mya+f1tOanmkKqPnYf4pHc65TR59ZVLLUUloNJo+ddfyZU9cOsfDFr3l978TWcsR8jrSKee85rSazGOb1M/2nrfq0UAgLi3CWjt/x28c2bFpn/Uf3ZN8/QGAGTO5oVtwer7LQWVgGb5+ohlO5YbgvV3fxPza+pzQqa9AdjUB2856ENOZOSmfcIHo38X1p29Zla579fuEiqOmiUM37jXovaP3n0oBIybL9SdtECYtOuoye12XhhkUbuO0CR3L71po9v/rnOshW9fvWU7lB2st04nwxdbEClZ0zy9JSOooNl70X5QM508rfo2wsENZ7D5gXo0umb6LnQe9pmYp2O5IYj4NxqD53ZH44517PcFzMd7D4mUbN+rRWLAAoDlE7cBAEa2UZ8aHgo6i32vFqHPxHaIiYwzWlfI/fRxZwAXlxI5Ic1cFfB+Tmvw3O5omqc3vHOqH0vTqH1tMd+EDd8ZrfObMW2cfU7LJDw9JFKIoN/3of33lt8H6sR4ekhE6Q9HWkTkLDjSIuWLj0lAk8xdZI/pS7e3yV3my/ZlQsc5TtNHOU0yd3Hq/unDoEUZ1uvnEehXc5TF9bi6mrYm0lJKDDC2wKDlRBofM2+F9sAr8qvEM4r98astKt+puPErbqYYsbK/xX0h0zFo2ZF2UGp87AfxAwAdz47Vm0cjPiVR3J96dw3+jn5iML+xfWelOW1p49sHmbN56T1uaOQxu/9SSb63EdI3Bs3suwQA8OhGiJhnwdA1Ou0AQP+ao8U8h9efNrsv2vlePXujk75i7GbZvBo3Tv4t7htq75c2M8Vj2+bt1zkuqATx+Ioxm+CeyU0njyKYunTeQZ90q9HRQQbT+l2aYbSOHucn6z12N+qJZF+7vuWP9pjSRYcI9OosbPh1pyAIgpCU8E4I9OosBHp11ptXTtvC/STH3kbECIfWnZIt3zfgJ4N9CfTqLNw+e18QBEFQqVSy+X5oMF62L+M7/K7T/0CvzkLv6iMk+8vHbJJtV9uJref1fl+5MoFenYXW+Xvr5FGlqL/DqglbDP7ZOohJcYEjLSdV1ruoWfm/Ojdesu+f1Vdvfd39pbeCOIvNs9Wvg28/RH0LSiZPd3hm8TC7nqhXb/Fpx7rifjafrJJ9cyy8NAXla5UCALi4pG3uSvvU8dOOdRF8+2ma6tHn5LYLOu3sj1+NmKj3q+SbZO4CF1cXuPw3/9ZldGur9sGeGLTsJPXpoJyDn8zCFydHAAC+K/0//Pr3evS/PMOk+tfW+gXNjg+B8N8SFk83D6x9cgBfnBopqa/VKcsnnm1l8cj1OmnzTk8wu5798atxeP1pNMncBSF3n1nUJ78K1n0T0bBlfa1aHwBM7DQXgPTUUe70cfW99PFoHt7GYycHP5llcF9jx0dTxe2hZTvK5tFYFjBSsr+3/m+S/a+KBeKrYoEm1+eM8hbOlaZy++NX48t8vdCr2ghxP6PL5evj6C5YBUdaCuW3cppN8jqST17dRyRvn687oWyq7S8Xq4PXt4FOs1zgxZNXOmnJSckW1Zkjj/ptSfvjV+t8tK2busOidpwFg1YGkMvL/DfpOEJQyHydtNRX1tKi15ROeo89uhFicf3GpPz3tFUA6FpWOj3gmcUDm2btNlrHh40+0HtsY+gfAIDIf+UfMQ0APSd3wJpJ24y2owS8jUeh/FZOQ3C34Y7uhtWlHhEtv/UbelQcIo4aDI2YDOWZsHUwAppVkaQtHrlenPwH1PNN2hP2TTJ3MXhaqa8vmjITOs7By5BXuH/lsexxffUEdvkIB1afNJpP+3jkv9FoX/RbvX2RK99qQFNsm7fPmU6dTbrSwaBlA34rp8HVxQUqQUDjIqVwMPQfAIB/9pw42kr9KJHiq6ZDlerPPnUQuvAiFO32rZPNowla2qd+p9v0Q6Fs70+xtI/JBTi/ldPwTfkaWHrnot58/iunSf4S0mOgJKfBxy07SrEVU41uP4+N1ikTn/xO3I9OTJDkl2sj9XF9+Q2lt9u7Vm+dxVdOE6quf/9o3/EXDhvsE5GFuE7Lmflm0X3V/PTLx8XtSutnY1OzrwzWMb2O5eutgpq+n+9JPYpKEQRc6fD+VpefazS0uL20qLL7Z7PLTLkl/2KL1BocfH+1ttqeMWa3Y6q0fAeSxyUPDlRr0x8Ij9P/7sAa+QyvEWpXSv/krLUo5cpjakFPzmNkxZZG8x1tPCJN9Qce/hUHPh2aprJkGY60HMRv5TScaN0Hwd2GO/U8kaZ/2h97GH51I3qcXYLbkYYXh2pGMI9iXhrN1/bEPLxKfKuTLrev+Vl73wTZvC8TpFfq9oXdxIXXj3Dw+S0cfH5bzC9AwI+XdRfNUtoxaDlAzyNbAAAeboZvWK0eNNce3XFK+8NuYnntnqjgU8hgvsACFVFv/yQUz5bPaJ2bPh6APJ66p+XarrVQB6lcHllN7yyAEVc3ove55Rh6JQhDr2wQ013ggpnVlbeo15kxaDnA1NrS53x32K/7P3Fwt+F4nRAnOX2ce+OMzfumbXi1+jqnh1U3zNGT2zGmV2uPU01Gof7BKRCseLE5JQ11XWsxQfyQ7XBOywHyZM6KgPxFxIBwo+MgZPfwxPgLhyX5Ui9paOlXzqx2tMsaW/4gp1+lWuhR/kNJWc2SDVv7JH9ZfHN2KX6q+JnBfK2Oz8G2+t+hZ4mPkZiSDC+3TBa1W2X3z7jWYgKiktQ3G6+t1wcrH51CKW9fuGrdMF0lZ1FEJsUhPiUJBTL7YF6NLhh2JQjflw3E+JvbsbBmDwCAAAETb+60qE8kxXVaJKt90xkI2jfY0d2gjIXPiCfT7N56GYEB4xEYoH68TWDAeES8iZXsD/92tbj/NOQ1AgPGo3tr9Zzb2qUnEBgwHu2aqJ9Isej3g5L6NHVob8sdDwwYjyQL78OjDMDUBV0O+pAdNK4xTietXZPf9B7X7C+ec1A2vUXdSULLepMEQRCEv289FQRBEPp0WqCTb9ywjbL79tDmrzVCsWXThGLLptmtTTKKi0uVJjzmLfznzhA/9nLgwi8ApKMhQzSP6e05sJFsuXwFciBLVk8AwLt3KdAnV55sYvvNak9Epx71zOs4ZUgMWk7EN5s3Hg+0/zyS5tSsiF8eMU379DC1NTu/l5zeVa3hj8CA8ShTQX55QodmM/Honxd660tMeIeUFBX6d11s4TehjIAT8U5IM8pyRABzhMCA8eJoz17+t3stLr1UL1wN7jHMrm2TXiZNxHPJgxX4z50hG2D8587ArT4DkdXDAykqFUrOlz6tdGCNWvixVtqeXZ6e2DtgkbIxaFlBNd+COoHr3mv1EyqzeqhfzODm6oq7/b6Hl7v6j/xkSDC67tjCoEVkJgYtK9jStqPOxHnTdSuxta30iZmagAUAHxX1s0fXjPJbPt3gcblTJ02ZHZ91QeU8BVAz6A+8iIvRyWfstKv5jhW480b/PYPGymv6oclXdd1cRCTG6+R72H0o3NLwJh3tPxueQjoPTsTbUFXfAjppJ0OC0WFrEKou1n20sL0ZC1jG9Di4BX7Lp8sGLGP1+y+fbjBgmds/fQELgCIC1i8HDmPkvoNWq6/W/IVWq8vZcKRlJcPqfCSeIh589EDnuGYkNrVhIDa0bi9JcwRTfimNBY03CepbXR51Hyq5xUUlCCi+4lexDrn6H/cYpvfYhfBQtNurvh/zpzMHMLlOoE4ebR+s/R3RSYk4+b8+KOKdQ3Ks474Nekrp54gR1vjAT60atM5920cn7Wb4C1TyzW+1NhyFIy0r6Vc9QNzuvXsHHgz4USfPhW/6on2FSvbsllG7Pu+q95gpv7DBPYZJAhYAuLq4oH1p48/60ld/gG8RcXvdvWtG64lOSkRwj2E6AQsA1jftYLS8NnsErDEHjwAASk7X/x7CPlvV9ysmq1S4FvYcANAtaAuuPAvD76fO4lWs+j+M8jPmoOT0WYhKSMCmG7fE8r22bDfaD037qX/WX7AUp4KfiPkmHj6GFJUKO27fNfk72hKDlpV136l+7IzcKYlmch4A+u/dZbc+GfLZzlU2qXda3abGMxng7mr/f5r2GmGNa2z8CbCHHzwEoP5z+H6n+uUbp5+EoFqhgvi+Xm3J6d+DYT8gh5cX2n5Q0ax+PBimfjPQsT7fSNKfRUejnl8xcX/f/X/g5uqKLyqYd8O+rTBoWdGBr7rj+JNg2WPl8uRFl+2bxdXuuTNn1smTeiW8LVfG//V5N3Hbb/l0JKTY7p6/gcfMD9DFs5v+klZrBBh7BaxKs+YhOjHRaL4Hw35ApVlz0Wb1ehzv+43R/JYw9h/EqX69MO/MebRZ7RwPM+SclhWVypVb74LQPR11T8MmfNJIsm/PxaQVc+dH8H/zSgBQdtVM8Zi1f2l3Pb6LuZ/IP2LG0osB1mDPOSxvT08cuP8AAUUMP9wwWaVC/LtkXH8ejtDIKBTxyYEvK5TDlWdhOBUcgmN9vja77Var1omjK1M9iYhEVg8PfFmhHFZduWp2m7bAFfF2NuPcacy7eM6pVrsnpCRLgpaGKRP0hn7JDeVLHazcXV0xtNrH+KJEefhmyYbyq2chLvmdSf1IS6DRXhGfmjMsbyg5fZYYYLS30zk+msYZDXbCxaRebu4I7jGe8kIVAAAIbElEQVTMaGBJq+wennqPeXt4IrjHMDzoNgR9KgXAN0s2q7Rpjkq5fSXf3RlGfxqp341JDFpWE5Hwfo1Qw9XLAKjnpGovX+jQpQ3mSh285lyz/BHPMz5qLtnXDgo3v/petoxmlGUPmiuo2t+77qYFdmtfjmZk5eriklFGWSZj0LKSaov/AABMO3MSR7qo5xseDxyMsz36oHhO0yeVnUXhbOrlAzOvnrK4rsZFS8mme7o535SqJnA9i4lGhTWzHdwbksOgZSWatUoLLl8Q0xr8N+KKe5fkkD5Z4mlMFACgSTH5gKNRea38iy5MOcVK1HPF0pZXMk1Rv5A/ACBWgX9vGQGDlpU8HPAjIhLi0a1yVQDA1fDnOPrfiCs8Rv42F0fyWz4dfsunY8aVk5L0F3ExkoCzsGErg/VEJSVI8guQBqzrX32nU0b7NKzEfyvnNaqsm4Oyq2biw3yGr67Z0srAtuK2M81vkRqvHmZQpvwymnL1cFPzTmi7Z51svsc9hum9HFRhzWy9I5mvK3yIXwIaGr06aK2rh/rK+y+fLv4DdIYrihmASVcPGbQyuEOhD/D71TO4G/ES3pk8MahqXXQrV81gGbmlDNFJiQjcthSebu5Y3Kg1Svvk0VdcYsP9Gxh77hAKZsuO0TUaoGGREmn/MqR0DFpkG3xkC9kI12kRUfrDoEVEisKgZaael7o7ugsZVvn1jlmk26zMCERHxBrNQ/bBoEVEipIhglbPS92x7dkW9LzUHXeib4tpmo92voFX+4lpvS9/jZ6XuuP7a9+i16Ueknx9L/fE0Bvq2yuG3xgsOUbv+a+eKvkJABU2qG/O9tNKG31+v06a3+qpCH4bgcGn/xLTSq/7FcmCSsy3K/guzoQ/QWxykqQspV/Odx+FjbQq1AYucEH57BXEtCUfrpDk0exrBx5T0qZ9MEPcTl1nemTOFcPHXUag6+EguLu6iWmdSlVR19NlBPoe34YF9Vvh63I18NmeFWKecRcP4U7Hwcjingkz6rYU0+93Giqpf+DJHWn8FkCPT6fjXVIy1pz8SedYp7oTkSd/DszZOlC2rKAS8GWVnzF4ajt83Nz4U1rJejJE0MrpkQs9L3WHC1zwZaHWevONujUckypOkz2W38vXYBuuLm6on/cTS7qpSCmCCm4u+gfsfqunIriLdL4nKSVFsh+X/A55vLJgV/PuqL5JfVtQsqCCu4F6taWu35BmZUZg772pkjmoQW3nY/ambyV5ACDiVYzsXFW3BtPwMiwCADDlh3X4dWiQye2T5TLE6WFE0hss+XAFFn+4XG+eR7EPZQPW2+RoAMCLhHC9ZZNUSZhdZQ5Uggr/xNy3vMN2Vm3PaFTbMxqdT/8JAPg7KgzV9owGAEy8uV08DgDfX1qNantGY+gV9VMs7/6X9+wr9cs8kgV1QLoTJX1Wlfap28p7l8W0BfVbwdPNHftD1X9uCcnq+w4nBASi9LpfERYbjdk39N+0vbxhWww4sR1RSQkoZ+JEvSZwaT6pA1a+gjklx1N7GRaBXbcnicdLVypsUrtkJYIgOPPHKuKS48Ttnhe7m1yu16Ue1uqC0zvy/LbeY+1OzBWq7h4lCIIg/vz86ExBEAQhPD5Kkr7onyPitj5/3DxrcX/Tqmnp4cK5w3cMHpdLi3oTIwiCIMTFJurNQxYzKS5kiJHWxlD1qGB9yFpMqsTJ2tQSU96hgW952WPV9oxG0EcD4OEqnUkIjXsNAMjjKX1o35/3D6OhnrqcRdW6hp9cYcjJvTes2BNKiwwRtLr5qZ+20LHoV8jnafp73xZVX2arLjmVzSEXsTXkIpoc0T09PtNkDJod+RUnAtWnh5ndPFBr31hcaT5RJ++1iCe40nwifqvWCTX3jdHbXr+KtazXeTsrWcFxT58gtQwRtEi/2ORElM9REPffhmNIueY6x73cMmFvw6HiSCtRlYxzTceKxzWT8FeaT0SVnO9fO3W+6TjbdtyGDm27rPdY8bK6bw2/dfGxLbtDqWSIq4ekX1Z3T1TN5YequfxMyn+p2XjbdsjB9tydgublRuJ5yBvUaVwBA1rJP+SwWZkRmLTsG1w78wCblhy3cy8zNo60iLS4uLpgwNgvse6PwxjQag523pwIz8yZJHn23puKPL45MOrrpdi05LjsFUayHT6ahoicBR9NQ0TpD4MWESkKgxYRKQqDFhEpCoMWZTjty/O59krGq4dE5Cx49ZCUr1XxQWjm2x9ju/6JZr79AQAdKw4HADQvoPV0Bt/+OLHjMo5sviBJ0/6ZOl1T/+dFv0NKskpMP779Eq6fuof42ESdsuR4XBFPTm3bo9n49tPJGLuqnxhA1t+ahtEd58HN/f2DBV1cXfDxF9UlZfeG/wEAWH11ssE2doZIV71P7Zsx7jlVKgYtUpx9a05j4voBSElWiWmCyrozCZqAR86Hp4ekOPNGqB811LLwAEn6+YM3cenIbYvrH7+2P6b0XoqYqDh86T/I4vrIujgRT0TOghPxRJT+MGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiMGgRkaIwaBGRojBoEZGiuDu6A0a4OLoDRORcONIiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRWHQIiJFYdAiIkVh0CIiRfk/uDaUc8wMO9YAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.imshow(wc)\n",
    "plt.axis(\"off\")  # 不显示坐标轴\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<wordcloud.wordcloud.WordCloud at 0xa052b38>"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wc.to_file('hdfs.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
